This sets up an agent-based model that exhibits complexity. We will use it to for the following workshop, where you will attempt to optimize over the output from the model.
The model is a square grid of cells, wrapping on both axes, with one agent per cell. Each agent has a value that it can adjust up or down. An agent wants to have a higher value than three of its neighbors, but does not want to be the highest. Agents will:
# From https://stackoverflow.com/questions/2453326/fastest-way-to-find-second-third-highest-lowest-value-in-vector-or-column
maxN <- function(x, N=2){
len <- length(x)
if(N>len){
warning('N greater than length(x). Setting N=length(x)')
N <- length(x)
}
sort(x,partial=len-N+1)[len-N+1]
}
# End sourced code
# Gets the change the agent performs based on its neighbors (not the new value)
getAgentChange <- function(agentValue, neighborValues)
{
neighborValues = c(neighborValues, agentValue)
if (agentValue >= max(neighborValues))
return (-3)
if (agentValue < mean(neighborValues))
return (1)
if (agentValue >= mean(neighborValues) && agentValue < maxN(neighborValues, 2))
return (1)
return (0)
}
# Builds a world with the specified height and width.
# startingValues should be a vector, with one value
# per cell (applied down each column, then across each row).
buildWorld <- function(height, width, startingValues = NULL)
{
if (is.null(startingValues))
{
startingValues = rep(100, height * width)
}
densityValue = 1 / (height * width)
sumStartValues = sum(startingValues)
world <- data.frame(X=0, Y=0, Value=startingValues[1], Density = densityValue)
i = 1
for (x in 1:width)
{
for (y in 1:height)
{
world[i,]$X = x
world[i,]$Y = y
world[i,]$Value = startingValues[i]
world[i,]$Density = startingValues[i] / sumStartValues
i = i + 1
}
}
return (world)
}
# Create and graph an initial world
world = buildWorld(16, 16, seq(100, 125.6, 0.1))
ggplot(world, aes(X, Y, z = Density)) +
geom_raster(aes(fill = Density)) +
geom_contour(colour = "white")
# Updates the passed in world according to the updateFunction
# Neighbors are only the adjacent four. If includeDiagonals is
# true, then all eight adjacent cells are counted as neighbors.
updateWorld <- function(world, updateFunction, includeDiagonals = FALSE)
{
newWorld = buildWorld(max(world$X), max(world$Y))
for (x in 1:max(world$X))
{
xLower = x - 1
if (xLower == 0) xLower = max(world$X)
xUpper = x + 1
if (xUpper > max(world$X)) xUpper = 1
for (y in 1:max(world$Y))
{
yLower = y - 1
if (yLower == 0) yLower = max(world$Y)
yUpper = y + 1
if (yUpper > max(world$Y)) yUpper = 1
neighborValues = c()
neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == y,]$Value)
neighborValues = c(neighborValues, world[world$X == xLower & world$Y == y,]$Value)
neighborValues = c(neighborValues, world[world$X == x & world$Y == yUpper,]$Value)
neighborValues = c(neighborValues, world[world$X == x & world$Y == yLower,]$Value)
if (includeDiagonals)
{
neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == yUpper,]$Value)
neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == yLower,]$Value)
neighborValues = c(neighborValues, world[world$X == xLower & world$Y == yUpper,]$Value)
neighborValues = c(neighborValues, world[world$X == xLower & world$Y == yLower,]$Value)
}
currentValue = world[newWorld$X == x & newWorld$Y == y,]$Value
newWorld[newWorld$X == x & newWorld$Y == y,]$Value = currentValue + updateFunction(currentValue, neighborValues)
}
}
newWorld$Density = newWorld$Value / sum(newWorld$Value)
return (newWorld)
}
# Update the world and display its new landscape
world = updateWorld(world, getAgentChange)
ggplot(world, aes(X, Y, z = Density)) +
geom_raster(aes(fill = Density)) +
geom_contour(colour = "white")
# Assembles the base data - what elevation is each coordinate at for each time step
buildTimeWorld <- function(width, height, iterations)
{
lastStep = buildWorld(width, height, seq(100, 100+(height * width * 0.1), 0.1))
lastStep$Time = rep(1, nrow(lastStep))
totalData = lastStep
for (t in 2:iterations)
{
newData = updateWorld(lastStep, getAgentChange)
newData$Time = rep(t, nrow(newData))
lastStep = newData
totalData = rbind(totalData, newData)
}
return(totalData)
}
# Given a set of points over time, outputs the optimal point at each time step
getOptimalData <- function(timeWorld)
{
optimalData = data.frame(OptimalX = 0, OptimalY = 0, Time = 0)
i = 1
for (t in min(timeWorld$Time):max(timeWorld$Time))
{
timeSubset = timeWorld[timeWorld$Time == t,]
optimalData[i,]$OptimalX = timeSubset[timeSubset$Value == max(timeSubset$Value),][1,]$X
optimalData[i,]$OptimalY = timeSubset[timeSubset$Value == max(timeSubset$Value),][1,]$Y
optimalData[i,]$Time = t
i = i + 1
}
return (optimalData)
}
# Given a set of points over time and an optimization function, returns the optimization function's
# guess of optimality for each time step
getGuesses <- function(timeWorld, optimizeFunction, ticksPerTime = 1, ...)
{
guessData = data.frame(GuessedX = 0, GuessedY = 0, Time = 0)
xGuess = floor(mean(timeWorld$X))
yGuess = floor(mean(timeWorld$Y))
i = 1
for (t in min(timeWorld$Time):max(timeWorld$Time))
{
timeSubset = timeWorld[timeWorld$Time == t,]
guesses = optimizeFunction(timeSubset, ticksPerTime, xGuess, yGuess, ...)
xGuess = guesses[1]
yGuess = guesses[2]
guessData[i,]$GuessedX = xGuess
guessData[i,]$GuessedY = yGuess
guessData[i,]$Time = t
i = i + 1
}
return (guessData)
}
# Horrible ugly monster function to encapsulate a bunch of work.
# Builds a world (or reuses the one passed in a timeWorld). Uses that to obtain optimal points
# (if includeOptimalPoints is true), as well as run an optimization function (if optimizeFunction is not
# null). Returns an object with three dataframes (or NULLs) - worldData, optimalPoints, and guessedPoints.
assembleData <- function (width = 17, height = 17, iterations = 50, timeWorld = NULL, optimizeFunction = NULL, ticksPerTime = 1, includeOptimalPoints = T, ...)
{
finalData <- list(worldData = NULL, optimalPoints = NULL, guessedPoints = NULL)
if (is.null(timeWorld))
{
finalData$worldData = buildTimeWorld(width, height, iterations)
} else {
finalData$worldData = timeWorld
}
if (!is.null(optimizeFunction))
{
finalData$guessedPoints = getGuesses(finalData$worldData, optimizeFunction, ticksPerTime, ...)
}
if (includeOptimalPoints)
{
finalData$optimalPoints = getOptimalData(finalData$worldData)
}
return (finalData)
}
# Assembling a world only
finalData = assembleData(16, 16, 50)
# Save out the world for reuse
timeWorld = finalData$worldData
# See how much faster this is now?
finalData = assembleData(timeWorld = timeWorld)
visualizeFunction <- function(allData, fromtime = 25, untiltime = 50)
{
fromtime = max(min(fromtime, max(allData$worldData$Time)), min(allData$worldData$Time))
untiltime = max(min(untiltime, max(allData$worldData$Time)), min(allData$worldData$Time))
worldData = allData$worldData[allData$worldData$Time >= fromtime & allData$worldData$Time <= untiltime,]
# Assemble base animation
animation = ggplot(worldData, aes(X, Y, z = Density)) +
geom_raster(aes(fill = Density)) +
transition_states(Time, transition_length = 10, state_length = 0, wrap=F)
labs(title = "Time: {closest_state}") +
enter_fade() +
exit_fade() +
ease_aes("linear")
# Add optimal points, if present
if (!is.null(allData$optimalPoints))
{
optData = allData$optimalPoints[allData$optimalPoints$Time >= fromtime & allData$optimalPoints$Time <= untiltime,]
lengthenFactor = nrow(worldData) / nrow(optData)
final = optData
for(i in 2:lengthenFactor) final = rbind(final, optData)
optData = final
optData = optData[with(optData, order(Time)), ]
animation = animation + geom_point(x=optData$OptimalX, y=optData$OptimalY, colour="hotpink", size=4)
}
# Add guessed points, if present
if (!is.null(allData$guessedPoints))
{
guessData = allData$guessedPoints[allData$guessedPoints$Time >= fromtime & allData$guessedPoints$Time <= untiltime,]
lengthenFactor = nrow(worldData) / nrow(guessData)
final = guessData
for(i in 2:lengthenFactor) final = rbind(final, guessData)
guessData = final
guessData = guessData[with(guessData, order(Time)), ]
animation = animation + geom_point(x=guessData$GuessedX, y=guessData$GuessedY, colour="lawngreen", size=4)
}
# Display animation
animation
}
visualizeFunction(finalData)
All functions have a standardized signature:
# Returns the average distance between guessed and optimal points
evaluate <- function (allData, fromtime = 25, untiltime = 50)
{
if (is.null(allData$optimalPoints) || is.null(allData$guessedPoints))
{
stop("Incomplete data for evaluate; allData is missing either optimalPoints or guessedPoints")
}
expected = allData$optimalPoints
actual = allData$guessedPoints
fromtime = max(min(fromtime, max(expected$Time)), min(expected$Time))
untiltime = max(min(untiltime, max(expected$Time)), min(expected$Time))
xLength = max(allData$worldData$X) - min(allData$worldData$X) + 1
yLength = max(allData$worldData$Y) - min(allData$worldData$Y) + 1
averageDistance = 0
for (t in fromtime:untiltime)
{
xLower = min(expected[expected$Time == t,]$OptimalX, actual[actual$Time == t,]$GuessedX)
xUpper = max(expected[expected$Time == t,]$OptimalX, actual[actual$Time == t,]$GuessedX)
yLower = min(expected[expected$Time == t,]$OptimalY, actual[actual$Time == t,]$GuessedY)
yUpper = max(expected[expected$Time == t,]$OptimalY, actual[actual$Time == t,]$GuessedY)
baseDistance = sqrt((xUpper - xLower) ^ 2 + (yUpper - yLower) ^ 2)
xWrap = sqrt((xUpper - (xLower + xLength)) ^ 2 + (yUpper - yLower) ^ 2)
yWrap = sqrt((xUpper - xLower) ^ 2 + (yUpper - (yLower + yLength)) ^ 2)
bothWrap = sqrt((xUpper - (xLower + xLength)) ^ 2 + (yUpper - (yLower + yLength)) ^ 2)
averageDistance = averageDistance + min(baseDistance, xWrap, yWrap, bothWrap)
}
averageDistance = averageDistance / (untiltime - fromtime + 1)
return (averageDistance)
}
# Returns accuracy as a percent of maximum possible distance
evaluatePercent <- function (allData, fromtime = 25, untiltime = 50)
{
avgDistance = evaluate(allData, fromtime, untiltime)
xLength = max(allData$worldData$X) - min(allData$worldData$X) + 1
yLength = max(allData$worldData$Y) - min(allData$worldData$Y) + 1
percent = avgDistance / sqrt((xLength / 2) ^ 2 + (yLength / 2) ^ 2)
percent = 1 - percent
return (percent)
}
# Returns percent of measured timesteps where the guessed point is the same as the optimal point
evaluateSyncCount <- function(allData, fromtime = 25, untiltime = 50)
{
if (is.null(allData$optimalPoints) || is.null(allData$guessedPoints))
{
stop("Incomplete data for evaluate; allData is missing either optimalPoints or guessedPoints")
}
expected = allData$optimalPoints
actual = allData$guessedPoints
fromtime = max(min(fromtime, max(expected$Time)), min(expected$Time))
untiltime = max(min(untiltime, max(expected$Time)), min(expected$Time))
syncCount = 0
for (t in fromtime:untiltime)
{
xGuess = actual[actual$Time == t,]$GuessedX
yGuess = actual[actual$Time == t,]$GuessedY
xOptimal = expected[expected$Time == t,]$OptimalX
yOptimal = expected[expected$Time == t,]$OptimalY
if (xGuess == xOptimal && yGuess == yOptimal)
{
syncCount = syncCount + 1
}
}
syncCount = syncCount / (untiltime - fromtime + 1)
return (syncCount)
}
randomGuesses <- function(currentData, ticks, startX = 1, startY = 1)
{
bestX = round(runif(1, min = min(currentData$X), max = max(currentData$X)))
bestY = round(runif(1, min = min(currentData$Y), max = max(currentData$Y)))
return (c(bestX, bestY))
}
hillClimb <- function(currentData, ticks, startX = 1, startY = 1)
{
bestX = startX
bestY = startY
bestValue = currentData[currentData$X == bestX & currentData$Y == bestY,]$Value
for (i in 1:ticks)
{
xLower = bestX - 1
if (xLower == 0) xLower = max(currentData$X)
xUpper = bestX + 1
if (xUpper > max(currentData$X)) xUpper = 1
yLower = bestY - 1
if (yLower == 0) yLower = max(currentData$Y)
yUpper = bestY + 1
if (yUpper > max(currentData$Y)) yUpper = 1
if (currentData[currentData$X == xLower & currentData$Y == bestY,]$Value > bestValue)
{
bestX = xLower
bestValue = currentData[currentData$X == xLower & currentData$Y == bestY,]$Value
}
if (currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value > bestValue)
{
bestX = xUpper
bestValue = currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value
}
if (currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value > bestValue)
{
bestY = yUpper
bestValue = currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value
}
if (currentData[currentData$X == bestX & currentData$Y == yLower,]$Value > bestValue)
{
bestY = yLower
bestValue = currentData[currentData$X == bestX & currentData$Y == yLower,]$Value
}
}
return (c(bestX, bestY))
}
Accepts two additional parameters:
linearCoolingFunction <- function (x) { return(1 - 0.1 * x) }
powerCoolingFunction <- function (x) { return(0.75 ^ x) }
decayCoolingFunction <- function (x) { return(1 / (x + 1)) }
simulatedAnnealing <- function(currentData, ticks, startX = 1, startY = 1, coolingFunction = NULL, temperatureTickAmount = NULL)
{
if (is.null(coolingFunction))
{
stop("Simulated Annealing requires CoolingFunction!")
}
if (is.null(temperatureTickAmount))
{
stop("Simulated Annealing requires TemperatureTickAmount!")
}
bestX = startX
bestY = startY
bestValue = currentData[currentData$X == bestX & currentData$Y == bestY,]$Value
currentCoolingValue = 0
for (i in 1:ticks)
{
xLower = bestX - 1
if (xLower == 0) xLower = max(currentData$X)
xUpper = bestX + 1
if (xUpper > max(currentData$X)) xUpper = 1
yLower = bestY - 1
if (yLower == 0) yLower = max(currentData$Y)
yUpper = bestY + 1
if (yUpper > max(currentData$Y)) yUpper = 1
nextX = bestX
nextY = bestY
nextValue = 0
if (currentData[currentData$X == xLower & currentData$Y == bestY,]$Value > nextValue)
{
nextX = xLower
nextValue = currentData[currentData$X == xLower & currentData$Y == bestY,]$Value
}
if (currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value > nextValue)
{
nextX = xUpper
nextValue = currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value
}
if (currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value > nextValue)
{
nextY = yUpper
nextValue = currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value
}
if (currentData[currentData$X == bestX & currentData$Y == yLower,]$Value > nextValue)
{
nextY = yLower
nextValue = currentData[currentData$X == bestX & currentData$Y == yLower,]$Value
}
# The part that makes this simulated annealing, not hill climbing
currentCoolingValue = currentCoolingValue + temperatureTickAmount
if ((nextValue < bestValue && runif(1) < coolingFunction(currentCoolingValue)) ||
nextValue > bestValue)
{
bestValue = nextValue
bestX = nextX
bestY = nextY
}
}
return (c(bestX, bestY))
}
dtob <- function(number, bits = 4)
{
return (as.integer(rev(intToBits(number)[1:bits])))
}
btod <- function(bits)
{
return (sum(bits * 2^seq(0, length(bits) - 1, 1)))
}
getChildren <- function(parents)
{
splitPosition = floor(runif(1, min = 0, max = length(parents[[1]])))
child2 = c(parents[[2]][1:splitPosition], parents[[1]][(splitPosition+1):length(parents[[1]])])
child1 = c(parents[[1]][1:splitPosition], parents[[2]][(splitPosition+1):length(parents[[2]])])
return (list(child1, child2))
}
mutate <- function(population, mutationRate = 0.05)
{
for (i in 1:length(population))
{
for (j in 1:length(population[[i]]))
{
if (runif(1, 0, 1) < mutationRate)
{
population[[i]][j] = 1 - population[[i]][j]
}
}
}
return (population)
}
getNextParents <- function(population, currentData, xBits, yBits, xMin, yMin)
{
values = c()
for (i in 1:length(population))
{
values[i] = currentData[
currentData$X == btod(population[[i]][1:xBits])+xMin &
currentData$Y == btod(population[[i]][(xBits+1):(xBits+yBits)])+yMin,]$Value
}
return(rev(population[order(values)])[1:2])
}
geneticAlgorithm <- function(currentData, ticks, startX = 1, startY = 1, mutationRate = 0.05)
{
xMin = min(currentData$X)
yMin = min(currentData$Y)
xRange = max(currentData$X) - xMin + 1
yRange = max(currentData$Y) - yMin + 1
xBits = ceiling(log2(xRange))
yBits = ceiling(log2(yRange))
randomParent = c(sample(c(0, 1), xBits, replace=TRUE), sample(c(0, 1), yBits, replace=TRUE))
baseParent = c(dtob(startX - xMin, xBits), dtob(startY - yMin, yBits))
parents = list(randomParent, baseParent)
for (i in 1:ticks)
{
children = getChildren(parents)
population = c(mutate(children), mutate(parents))
parents = getNextParents(population, currentData, xBits, yBits, xMin, yMin)
}
return (c(btod(parents[[1]][1:xBits])+xMin, btod(parents[[1]][(xBits+1):(xBits+yBits)])))
}
randomOutput = assembleData(timeWorld = timeWorld, optimizeFunction = randomGuesses, ticksPerTime = 1)
cat("Random guesses were, on average, off by", evaluate(randomOutput), "units.\n")
## Random guesses were, on average, off by 5.711964 units.
sprintf("This is %.2f percent accurate.", evaluatePercent(randomOutput) * 100)
## [1] "This is 49.51 percent accurate."
sprintf("This synced with optimal output %.2f percent of the time.", evaluateSyncCount(randomOutput) * 100)
## [1] "This synced with optimal output 0.00 percent of the time."
# visualizeFunction(randomOutput)
hillClimbOutput = assembleData(timeWorld = timeWorld, optimizeFunction = hillClimb, ticksPerTime = 10)
cat("Hill climbing was, on average, off by", evaluate(hillClimbOutput), "units.\n")
## Hill climbing was, on average, off by 7.094354 units.
sprintf("This is %.2f percent accurate.", evaluatePercent(hillClimbOutput) * 100)
## [1] "This is 37.29 percent accurate."
sprintf("This synced with optimal output %.2f percent of the time.", evaluateSyncCount(hillClimbOutput) * 100)
## [1] "This synced with optimal output 0.00 percent of the time."
# visualizeFunction(hillClimbOutput)
saOutput = assembleData(timeWorld = timeWorld, optimizeFunction = simulatedAnnealing, ticksPerTime = 100, coolingFunction = decayCoolingFunction, temperatureTickAmount = 0.05)
cat("Simulated Annealing was, on average, off by", evaluate(saOutput), "units.\n")
## Simulated Annealing was, on average, off by 6.149471 units.
sprintf("This is %.2f percent accurate.", evaluatePercent(saOutput) * 100)
## [1] "This is 45.65 percent accurate."
sprintf("This synced with optimal output %.2f percent of the time.", evaluateSyncCount(saOutput) * 100)
## [1] "This synced with optimal output 3.85 percent of the time."
# visualizeFunction(saOutput)
gaOutput = assembleData(timeWorld = timeWorld, optimizeFunction = geneticAlgorithm, ticksPerTime = 10, mutationRate = 0.05)
cat("Genetic Algorithm was, on average, off by", evaluate(gaOutput), "units.\n")
## Genetic Algorithm was, on average, off by 5.964251 units.
sprintf("This is %.2f percent accurate.", evaluatePercent(gaOutput) * 100)
## [1] "This is 47.28 percent accurate."
sprintf("This synced with optimal output %.2f percent of the time.", evaluateSyncCount(gaOutput) * 100)
## [1] "This synced with optimal output 0.00 percent of the time."
# visualizeFunction(gaOutput)